from pdfminer.high_level import extract_text
from pdfminer.pdfpage import PDFPage


def extract_text_by_page(file_path):
    text_list = []
    with open(file_path, 'rb') as file:
        for page_number, page in enumerate(PDFPage.get_pages(file), start=1):
            text = extract_text(file_path, page_numbers=[page_number - 1])
            text_list.append(text)
    return text_list


def remove_spaces_and_empty_lines(text):
    # 去除所有空格
    text_no_spaces = text.replace(" ", "")
    text_no_spaces = text_no_spaces.replace("：", ":")
    text_no_spaces = text_no_spaces.replace("）", ")")
    text_no_spaces = text_no_spaces.replace("（", "(")
    # 去除所有空行
    lines = text_no_spaces.splitlines()
    non_empty_lines = [line for line in lines if line.strip() != ""]
    cleaned_text = "\n".join(non_empty_lines)
    return cleaned_text

if __name__ == '__main__':
    print(extract_text_by_page("25119121152002893200.pdf"))
